package com.seanyung.aimltranslator.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
import org.xml.sax.SAXException;

import com.seanyung.aimltranslator.exception.RemoteServerException;

public class TransUtils {
	
	public static final int LOG_NODE_LEVEL = 5;
	public static final int RETRY_TIMES = 5;

    /**
     * 将传入的英文翻译为汉语，出错后返回原英文内容
     * @param input 要翻译的英文字符串
     * @return      翻译后的汉语
     * @throws RemoteServerException
     */
	public static String getEn2Cn(String input) throws RemoteServerException {
		String result=input;
		if(input==null||"".equals(input.trim())){
			return result;
		}
		try {
			String requstPath = "http://translate.google.cn/translate_a/t?client=t&hl=zh-CN&ie=UTF-8&oc=2&oe=UTF-8&otf=1&pc=1&sc=2&sl=en&ssel=0&tl=zh-CN&tsel=0&"
					+ "q=" + URLEncoder.encode(input, "UTF-8");
			URL url = new URL(requstPath);
			HttpURLConnection conn = (HttpURLConnection) url.openConnection();
			conn.setRequestMethod("GET");
			conn.setReadTimeout(10000);
			conn.setConnectTimeout(3000);
			conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0");
			if(conn.getResponseCode()!=200){
				Log.error("返回异常:代号"+conn.getResponseCode());
				Log.error("请求参数"+requstPath);
				return result;
			}
			InputStream ins = conn.getInputStream();
			BufferedReader br=new BufferedReader(new InputStreamReader(ins, "UTF-8"));
			StringBuilder sb=new StringBuilder();
			String restr=null;
			while((restr=br.readLine())!=null){
				sb.append(restr);
			 }
			result=sb.toString();
			try {
				result=URLDecoder.decode(result, "UTF-8");
			} catch (Exception e) {
				Log.error("urldecode失败:"+result);
				Log.error("已放弃处理",e);
			}
			result = result.substring(4, result.indexOf("\"", 4));
			 return result;
		} catch (Exception e) {
			Log.error("网络连接超时："+result,e);
			throw new RemoteServerException();
		}
	}
	
	/**
	 * 获取目标分词处理后的文本,词语间用分隔符连接
	 * @param string 要进行分词处理的原字符串
	 * @param separatoStringr  词语分隔符
	 * @return
	 */
	public static String cnSegment(String string,String separatoStringr){
		StringReader reader = new StringReader(string);
		IKSegmenter seg = new IKSegmenter(reader, true);
		Lexeme word = null;
		StringBuilder sBuilder=new StringBuilder();
		try {
			while((word = seg.next())!=null){
				sBuilder.append(word.getLexemeText()).append(separatoStringr);
			}
			if(sBuilder.length()>separatoStringr.length()){
				sBuilder.delete(sBuilder.length()-separatoStringr.length(), sBuilder.length());
			}
			return sBuilder.toString();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return string;
	}

	/**
	 * 将xml文本部分进行转换并输出到新的目录
	 * @param tarPath  目标文件夹
	 * @param file   要转换的文件
	 * @param translate 是否翻译
	 * @param segment 是否进行分词 
	 * @throws ParserConfigurationException
	 * @throws FileNotFoundException
	 * @throws SAXException
	 * @throws IOException
	 * @throws TransformerException
	 * @throws RemoteServerException
	 */
	public static void transAiml(String tarPath,File file,boolean translate,boolean segment)
			throws ParserConfigurationException, FileNotFoundException,
			SAXException, IOException, TransformerException, RemoteServerException {
		Log.info("文件："+file.getName()+"  处理方式："+(translate?" 翻译":"")+(segment?" 分词":""));
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		DocumentBuilder builder = factory.newDocumentBuilder();
		Document document = builder.parse(new FileInputStream(file));
		Element element = document.getDocumentElement();
		NodeList list = element.getChildNodes();
		NodeTransHandle(list,translate,segment);
		TransformerFactory tFactory = TransformerFactory.newInstance();
		Transformer transformer = tFactory.newTransformer();
		DOMSource source = new DOMSource(document);
		File dir = new File(tarPath);
		if (!dir.exists()) {
			dir.mkdirs();
		}
		StreamResult result = new StreamResult(new File(tarPath
				+ file.getName()));
		transformer.transform(source, result);
	}

	
	private static int[] index = new int[10];
	private static int elevel = 0;

	/**
	 * 对节点进行递归操作，可选是否开启翻译和分词功能
	 * @param list  所有节点列表
	 * @param translate 是否翻译
	 * @param segment 是否启用分词
	 * @throws RemoteServerException
	 */
	private static void NodeTransHandle(NodeList list,boolean translate,boolean segment) throws RemoteServerException {
		
		//日志记录部分
		StringBuilder sb = new StringBuilder();
		for (int i = 1; i <= elevel; i++)
			sb.append("    ");
		String blank = sb.toString();
		if (list.getLength() > LOG_NODE_LEVEL) {// 当子元素数量大于10时显示处理进度
			elevel++;
			Log.log(blank + "进入" + (elevel == 1 ? "根" : ("第" + elevel + "层"))
					+ "元素处理进程");
			index[elevel] = 0;
		}
		
		//处理过程
		int len = list.getLength();
		if (len < 1) {
			return;
		} else {
			for (int i = 0; i < list.getLength(); i++) {
				if (list.getLength() > LOG_NODE_LEVEL
						&& (i * 100 / list.getLength()) > index[elevel]) {
					index[elevel] = (i * 100 / list.getLength());
					Log.log(blank
							+ "处理进度："
							+ (elevel == 1 ? "" : ("第" + elevel + "层元素完成 "
									+ index[elevel] + "%；")) + "当前文件完成 "
							+ index[1] + "%");
				}
				if (list.item(i).getChildNodes().getLength() > 1) {
					NodeTransHandle(list.item(i).getChildNodes(),translate,segment);
				} else {
					try {
						String txt=list.item(i).getTextContent();
						if (translate) {
							txt = TransUtils.getEn2Cn(txt);
						}
						if(segment){
							txt = TransUtils.cnSegment(txt, " ");
						}
						
						list.item(i).setTextContent(txt);
					} catch (RemoteServerException e) {
						for (int j = 1; j <= RETRY_TIMES; j++) {
							try {
								String txt = TransUtils.getEn2Cn(list.item(
										i).getTextContent());
								list.item(i).setTextContent(txt);
								break;
							} catch (RemoteServerException e2) {
								if (j == RETRY_TIMES) { // 连续异常退出处理
									e2.printStackTrace();
									Log.error("网络错误,请稍后重试！");
									throw e2;
								} else {
									Log.error("网络错误，正在进行第" + j + "次尝试……");
								}
							}
						}
					}
				}
			}

			
			//日志记录部分
			if (list.getLength() > LOG_NODE_LEVEL) {
				Log.log(blank
						+ (elevel == 1 ? "根" : ("第" + elevel + "层"))
						+ "元素处理完成\n\n");
				elevel--;
			}
		}
	}
	
	/**
	 * 翻译英->中aiml到指定文件目录
	 * @param targetPath
	 * @param aimlFile
	 * @throws FileNotFoundException
	 * @throws ParserConfigurationException
	 * @throws SAXException
	 * @throws IOException
	 * @throws TransformerException
	 * @throws RemoteServerException
	 */
	public static void translateAiml(String targetPath,File aimlFile) throws FileNotFoundException, ParserConfigurationException, SAXException, IOException, TransformerException, RemoteServerException{
		transAiml(targetPath, aimlFile, true, false);
	};
	
    /**
     * 对aiml进行分词处理并放入指定文件目录
     * @param targetPath
     * @param aimlFile
     * @throws FileNotFoundException
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws IOException
     * @throws TransformerException
     * @throws RemoteServerException
     */
	public static void segmentAiml(String targetPath,File aimlFile) throws FileNotFoundException, ParserConfigurationException, SAXException, IOException, TransformerException, RemoteServerException{
		transAiml(targetPath, aimlFile, false, true);
	};
    /**
     * 对aiml先翻译，在进行分词，并放到指定目录
     * @param targetPath
     * @param aimlFile
     * @throws FileNotFoundException
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws IOException
     * @throws TransformerException
     * @throws RemoteServerException
     */
	public static void translateAndSegmentAiml(String targetPath,File aimlFile) throws FileNotFoundException, ParserConfigurationException, SAXException, IOException, TransformerException, RemoteServerException{
		transAiml(targetPath, aimlFile, true, true);
	};

}
